import urllib.request

url = 'https://www.sina.com.cn/'
headers = {
    # cookie中携带着你的登陆信息   如果有登陆之后的cookie  那么我们就可以携带着cookie进入到任何页面
    'Cookie': 'SUB=_2A25FoHjvDeRhGe9P7lYR9CfEyzSIHXVm3PQnrDV_PUNbm9ANLRL4kW9NdU9sd5FMsMcaSRXq50UbE8n8g5_jyG1X; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFodGNzomlFJjG.Qa-CeMxx5JpX5KzhUgL.Fo.pSKB7Sh.Rehn2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM4eK-XehB41h5R; ALF=1758172607; U_TRS1=0000005f.7ebd2fcd.68a408bf.daaa35c0; U_TRS2=0000005f.7ec72fcd.68a408bf.1fa89287; UOR=my.sina.com.cn,www.sina.com.cn,; SINAGLOBAL=111.37.144.95_1755580613.925790; Apache=111.37.144.95_1755580613.925792; name=sinaAds; post=massage; SGUID=1755580614142_54946350; Hm_lvt_90c40f528e0b2106bc03da5aadec190f=1755580615; HMACCOUNT=E6DF74674AA9618B; NowDate=Tue Aug 19 2025 13:17:04 GMT+0800 (ä¸­å›½æ ‡å‡†æ—¶é—´); ULV=1755580624045:2:2:2:111.37.144.95_1755580613.925792:1755580613731; Hm_lpvt_90c40f528e0b2106bc03da5aadec190f=1755580625',
    # referer  判断当前路径是不是由上一个路径进来的    一般情况下 是做图片防盗链
    'Referer': 'https://my.sina.com.cn/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# 确保所有 Header 值都是 latin-1 可编码的
for key, value in headers.items():
    if isinstance(value, str):
        headers[key] = value.encode('latin-1', errors='ignore').decode('latin-1')

request = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')

with open('weibo.html', 'w', encoding='utf-8') as fp:
    fp.write(content)